Visualize the following
import pandas as pd
channel_revenue = pd.read_csv('input_file.csv', skiprows=2,nrows=18)
channel_revenue
| Channel name | Paid/Free | Spend in $ | Visits | Orders | Revenue in $ | Week | |
|---|---|---|---|---|---|---|---|
| 0 | App install network A | Paid | 2978 | 50,000 | 550 | 32,615 | 42 |
| 1 | App install network B | Paid | 2342 | 600,000 | 7,200 | 398,160 | 42 |
| 2 | App install network C | Paid | 3345 | 500,000 | 5,000 | 235,000 | 42 |
| 3 | Web channel A | Paid | 3008 | 300,000 | 3,900 | 121,680 | 42 |
| 4 | Web channel B | Paid | 600 | 30,000 | 360 | 19,584 | 42 |
| 5 | Web channel C | Paid | 88 | 50,000 | 550 | 28,930 | 42 |
| 6 | Newsletters A | Free | 90 | 60,000 | 660 | 36,432 | 42 |
| 7 | Newsletters B | Free | 98 | 250,000 | 2,750 | 91,025 | 42 |
| 8 | PR | Free | 76 | 25,000 | 250 | 9,900 | 42 |
| 9 | App install network A | Paid | 309 | 40,000 | 715 | 39,138 | 43 |
| 10 | App install network B | Paid | 1009 | 480,000 | 9,360 | 278,712 | 43 |
| 11 | App install network C | Paid | 2398 | 450,000 | 4,000 | 305,500 | 43 |
| 12 | Web channel A | Paid | 3098 | 240,000 | 2,730 | 97,344 | 43 |
| 13 | Web channel B | Paid | 600 | 21,000 | 360 | 25,459 | 43 |
| 14 | Web channel C | Paid | 688 | 35,000 | 605 | 26,037 | 43 |
| 15 | Newsletters A | Free | 78 | 48,000 | 660 | 36,432 | 43 |
| 16 | Newsletters B | Free | 90 | 325,000 | 3,575 | 109,230 | 43 |
| 17 | PR | Free | 55 | 25,000 | 225 | 7,920 | 43 |
channel_revenue.dtypes
Channel name object Paid/Free object Spend in $ int64 Visits object Orders object Revenue in $ object Week int64 dtype: object
channel_revenue.Visits = channel_revenue.Visits.replace(to_replace = ',', value = '', regex=True).astype('int')
channel_revenue.Orders = channel_revenue.Orders.replace(to_replace = ',', value = '', regex=True).astype('int')
channel_revenue['Revenue in $'] = channel_revenue['Revenue in $'].replace(to_replace = ',', value = '', regex=True).astype('int')
channel_revenue.dtypes
Channel name object Paid/Free object Spend in $ int64 Visits int64 Orders int64 Revenue in $ int64 Week int64 dtype: object
channel_revenue.head(2)
| Channel name | Paid/Free | Spend in $ | Visits | Orders | Revenue in $ | Week | |
|---|---|---|---|---|---|---|---|
| 0 | App install network A | Paid | 2978 | 50000 | 550 | 32615 | 42 |
| 1 | App install network B | Paid | 2342 | 600000 | 7200 | 398160 | 42 |
channel_revenue.rename(columns={'Revenue in $': 'Revenue', 'Spend in $' : 'Spend'}, inplace=True)
channel_revenue['Conversion_Rate'] = (channel_revenue.Orders / channel_revenue.Visits)*100 # percentage
channel_revenue['Avg_Basket_Size'] = (channel_revenue.Revenue / channel_revenue.Orders) # $
channel_revenue['Cost_To_Income'] = (channel_revenue.Spend / channel_revenue.Revenue)*100 # let's represent as a %
channel_revenue['Revenue_per_Visitor'] = (channel_revenue.Revenue / channel_revenue.Visits) ## this will be in $ amount
channel_revenue.head(2)
| Channel name | Paid/Free | Spend | Visits | Orders | Revenue | Week | Conversion_Rate | Avg_Basket_Size | Cost_To_Income | Revenue_per_Visitor | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | App install network A | Paid | 2978 | 50000 | 550 | 32615 | 42 | 1.1 | 59.3 | 9.130768 | 0.6523 |
| 1 | App install network B | Paid | 2342 | 600000 | 7200 | 398160 | 42 | 1.2 | 55.3 | 0.588206 | 0.6636 |
channel_revenue.describe()
| Spend | Visits | Orders | Revenue | Week | Conversion_Rate | Avg_Basket_Size | Cost_To_Income | Revenue_per_Visitor | |
|---|---|---|---|---|---|---|---|---|---|
| count | 18.000000 | 18.000000 | 18.000000 | 18.000000 | 18.000000 | 18.000000 | 18.000000 | 18.000000 | 18.000000 |
| mean | 1163.888889 | 196055.555556 | 2413.888889 | 105505.444444 | 42.500000 | 1.260097 | 47.719843 | 1.622988 | 0.600107 |
| std | 1281.323116 | 200450.220378 | 2671.174315 | 118037.346830 | 0.514496 | 0.320647 | 13.858891 | 2.157772 | 0.232688 |
| min | 55.000000 | 21000.000000 | 225.000000 | 7920.000000 | 42.000000 | 0.888889 | 29.776923 | 0.082395 | 0.316800 |
| 25% | 90.000000 | 36250.000000 | 550.000000 | 26760.250000 | 42.000000 | 1.100000 | 35.314286 | 0.318642 | 0.405600 |
| 50% | 600.000000 | 55000.000000 | 687.500000 | 37785.000000 | 42.500000 | 1.118750 | 49.800000 | 0.776310 | 0.593925 |
| 75% | 2384.000000 | 318750.000000 | 3818.750000 | 118567.500000 | 43.000000 | 1.356250 | 55.200000 | 2.443226 | 0.675067 |
| max | 3345.000000 | 600000.000000 | 9360.000000 | 398160.000000 | 43.000000 | 1.950000 | 76.375000 | 9.130768 | 1.212333 |
Mean CIR is lower for 43: 1.23% as opposed to 2.01%: this means in week 43 the company had to spend 78 cents less to make 1$ of revenue from the average channel. This could be the result of particular promos/campaigns in Week 43.
Mean RVP in week 43 is higher: 66 cents per visitor for the avg channel vs 53 cents
week_42 = channel_revenue.loc[channel_revenue.Week == 42].describe().loc['mean',['Cost_To_Income','Revenue_per_Visitor','Spend','Visits','Orders']]
week_43 = channel_revenue.loc[channel_revenue.Week == 43].describe().loc['mean',['Cost_To_Income','Revenue_per_Visitor','Spend','Visits','Orders']]
df = pd.concat([week_42, week_43], axis=1,ignore_index=True)
df.rename(columns={0:'Week 42 Mean',1:'Week 43 Mean'},inplace=True)
df
| Week 42 Mean | Week 43 Mean | |
|---|---|---|
| Cost_To_Income | 2.011635 | 1.234341 |
| Revenue_per_Visitor | 0.532244 | 0.667970 |
| Spend | 1402.777778 | 925.000000 |
| Visits | 207222.222222 | 184888.888889 |
| Orders | 2357.777778 | 2470.000000 |
paid_v_free = channel_revenue.groupby(by=['Week','Paid/Free'],).agg({'Spend': [sum, 'mean'],
'Visits': [sum, 'mean'], 'Orders':[sum,'mean'],
'Revenue': [sum, 'mean']
}).sort_values('Week')
paid_v_free
| Spend | Visits | Orders | Revenue | ||||||
|---|---|---|---|---|---|---|---|---|---|
| sum | mean | sum | mean | sum | mean | sum | mean | ||
| Week | Paid/Free | ||||||||
| 42 | Free | 264 | 88.000000 | 335000 | 111666.666667 | 3660 | 1220.000000 | 137357 | 45785.666667 |
| Paid | 12361 | 2060.166667 | 1530000 | 255000.000000 | 17560 | 2926.666667 | 835969 | 139328.166667 | |
| 43 | Free | 223 | 74.333333 | 398000 | 132666.666667 | 4460 | 1486.666667 | 153582 | 51194.000000 |
| Paid | 8102 | 1350.333333 | 1266000 | 211000.000000 | 17770 | 2961.666667 | 772190 | 128698.333333 | |
paid_v_free['Conversion_Rate'] = (paid_v_free[('Orders', 'sum')] / paid_v_free[('Visits', 'sum')])*100
paid_v_free['Avg_Basket_Size'] = (paid_v_free[('Revenue', 'sum')]/ paid_v_free[('Orders', 'sum')]) # $
paid_v_free['Cost_To_Income'] = (paid_v_free[('Spend', 'sum')] / paid_v_free[('Revenue', 'sum')])*100
paid_v_free['Revenue_per_Visitor'] = (paid_v_free[('Revenue', 'sum')] / paid_v_free[('Visits', 'sum')])
# let's convert ratio to %
paid_v_free = paid_v_free.reset_index()
paid_v_free['Week and Category'] = paid_v_free['Week'].astype(str) + "-" + paid_v_free['Paid/Free'].astype(str)
import plotly.express as px
from plotly.subplots import make_subplots
All Plotly visualizations are interactive - you can hover over the individual bars or pie chart sections for more information.
import plotly.graph_objects as go
import plotly.subplots as sp
fig1 = px.bar(paid_v_free,x='Week and Category', y='Conversion_Rate', color="Paid/Free",
color_discrete_sequence=px.colors.qualitative.Pastel1)
fig2 = px.bar(paid_v_free,x='Week and Category', y='Avg_Basket_Size', color="Paid/Free",
color_discrete_sequence=px.colors.qualitative.Pastel1)
fig3 = px.bar(paid_v_free,x='Week and Category', y='Revenue_per_Visitor', color="Paid/Free",
width=730, height=400,
color_discrete_sequence=px.colors.qualitative.Pastel1)
fig1_traces, fig2_traces, fig3_traces = [],[],[]
for trace in range(len(fig1["data"])):
fig1_traces.append(fig1["data"][trace])
for trace in range(len(fig2["data"])):
fig2_traces.append(fig2["data"][trace])
fig = sp.make_subplots(rows=1, cols=2, subplot_titles=("Conversion Rate (%)", "Average Basket Size ($)"))
# Add the traces to the main plot
for traces in fig1_traces:
fig.append_trace(traces, row=1,col=1)
for traces in fig2_traces:
fig.append_trace(traces, row=1,col=2)
fig.update_xaxes(categoryorder='category ascending')
fig.update_layout(barmode='relative',bargap=0.3,bargroupgap=0.0, showlegend=False, height=400)
fig.show()
fig3.update_xaxes(categoryorder='category ascending')
fig3.update_layout(showlegend=True, margin_l=300,
title_text="Revenue per Visitor ($)", title_xanchor='left', title_pad_l=330,
yaxis_visible=False)
fig3.show()
As expected CR, ABS and RPV are higher for the paid category channels, in both weeks.
Though from the viz below, it may appear the 'free' categories have a much better (lower) cost to income ratio, it's important to note that these channels (like Newsletters), may saturate after a point.
Spending on newsletters to the level of paid channels would not bring paid-channel level returns.
Also, 'free' channels like newsletters are meant more to promote brand engagement than increase revenue/conversions. And in fact, spamming the same users with more newsletters could have negative consequences on brand image.
So this is one visualization that could be misleading without understanding the full context.
This visualization is also interactive
fig = px.bar(paid_v_free,x='Week and Category', y='Cost_To_Income',
labels={'Cost_To_Income': 'Cost to Income (%)'},
width=800,height=400,color='Paid/Free',
color_discrete_sequence=px.colors.qualitative.Pastel)
fig.update_xaxes(categoryorder='category ascending')
fig.show()
It intuitively makes sense that as visits increase, the number of orders should too. And this is the trend observed in the scatter plot below, with a regression line fit.
This visualization is also interactive.
fig = px.scatter(channel_revenue,x='Visits', y='Orders',
width=800,height=400,
color_discrete_sequence=px.colors.qualitative.Pastel,
trendline='ols',
hover_data=['Channel name','Week'])
fig.show()
When fitting a trendline (OLS - Ordinary Least Squares) of Orders vs Visits, we see that there are a few that don't exactly fit the regression line or are further away from it: for example, in the 3 points between 400 and 500K visitors, there is one that has much higher # of Orders.
Hovering over it, we see that it is for Channel 'App install network B' in Week 43. It is worth looking into what could have caused this increase:
This visualization is also interactive
channel_groups = channel_revenue.groupby(by=['Channel name'],).agg({'Spend': sum,
'Visits': sum, 'Orders': sum,
'Revenue': sum})
channel_groups['Conversion_Rate'] = (channel_groups.Orders / channel_groups.Visits)*100 # percentage
channel_groups['Avg_Basket_Size'] = (channel_groups.Revenue / channel_groups.Orders) # $
channel_groups['Cost_To_Income'] = (channel_groups.Spend / channel_groups.Revenue)*100
channel_groups['Revenue_per_Visitor'] = (channel_groups.Revenue / channel_groups.Visits) ## this will be in $ amount
fig1 = px.bar(channel_groups, y='Conversion_Rate',
color_discrete_sequence=px.colors.qualitative.Pastel)
fig2 = px.bar(channel_groups, y='Avg_Basket_Size',
labels={'Conversion_Rate':'Conversion Rate (%)'},
color_discrete_sequence=px.colors.qualitative.Pastel)
fig3 = px.bar(channel_groups, y='Revenue_per_Visitor',
labels={'Conversion_Rate':'Conversion Rate (%)'},
color_discrete_sequence=px.colors.qualitative.Pastel)
fig4 = px.bar(channel_groups, y='Cost_To_Income',
color_discrete_sequence=px.colors.qualitative.Pastel)
fig = make_subplots(rows=2, cols=2, shared_xaxes=True, shared_yaxes=False,
subplot_titles=("Conversion Rate (%)", "Average Basket Size ($)", "Revenue per Visitor ($)",
"Cost to Income Ratio (%)"))
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig2['data'][0], row=1, col=2)
fig.add_trace(fig3['data'][0], row=2, col=1)
fig.add_trace(fig4['data'][0], row=2, col= 2)
fig.update_layout(height=500, width=1000,
title_text="Metrics by Channel Name")
fig.show()
It's easy to see which channels are better performing from these bar graphs -
This visualization is also interactive
fig = make_subplots(rows=1, cols=2, shared_xaxes=False, shared_yaxes=False,
specs=[[{"type": "pie"}, {"type": "pie"}]],
subplot_titles=("Spend Share per Channel", "Revenue Share per Channel"))
fig2 = go.Pie(values=channel_groups.Revenue, labels=channel_groups.index, marker=dict(colors=px.colors.qualitative.Pastel))
fig1 = go.Pie(values=channel_groups.Spend, labels=channel_groups.index, marker=dict(colors=px.colors.qualitative.Pastel))
fig.add_trace(fig1, row=1, col=1)
fig.add_trace(fig2, row=1, col=2)
fig.show()
Let's now take a closer look at paid channels
This visualization is also interactive
week42_paid = channel_revenue.loc[(channel_revenue['Week'] == 42) & (channel_revenue['Paid/Free'].isin(['Paid']))]
fig = px.pie(week42_paid, values='Revenue', names='Channel name', title='Week 42 \'Paid\' Revenue Share',
color_discrete_sequence=px.colors.qualitative.Pastel1, height=500, width=500)
fig.show()
week42_paid['Conversion_Rate'] = (week42_paid.Orders / week42_paid.Visits)*100 # percentage
week42_paid['Avg_Basket_Size'] = (week42_paid.Revenue / week42_paid.Orders) # $
week42_paid['Cost_To_Income'] = (week42_paid.Spend / week42_paid.Revenue)*100
week42_paid['Revenue_per_Visitor'] = (week42_paid.Revenue / week42_paid.Visits) ## this will be in $ amount
fig1 = px.bar(week42_paid, y='Conversion_Rate', x='Channel name',
color_discrete_sequence=px.colors.qualitative.Pastel1)
fig2 = px.bar(week42_paid, y='Avg_Basket_Size', x='Channel name',
labels={'Conversion_Rate':'Conversion Rate (%)'},
color_discrete_sequence=px.colors.qualitative.Pastel1)
fig3 = px.bar(week42_paid, y='Revenue_per_Visitor', x='Channel name',
labels={'Conversion_Rate':'Conversion Rate (%)'},
color_discrete_sequence=px.colors.qualitative.Pastel1)
fig4 = px.bar(week42_paid, y='Cost_To_Income', x='Channel name',
color_discrete_sequence=px.colors.qualitative.Pastel1)
fig = make_subplots(rows=2, cols=2, shared_xaxes=True, shared_yaxes=False,
subplot_titles=("Conversion Rate (%)", "Average Basket Size ($)", "Revenue per Visitor ($)",
"Cost to Income Ratio (%)"))
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig2['data'][0], row=1, col=2)
fig.add_trace(fig3['data'][0], row=2, col=1)
fig.add_trace(fig4['data'][0], row=2, col=2)
fig.update_layout(height=500, width=1000,
title_text="Metrics by Paid Channel Name")
fig.show()
This visualization is also interactive
week43_paid = channel_revenue.loc[(channel_revenue['Week'] == 43) & (channel_revenue['Paid/Free'].isin(['Paid']))]
fig = px.pie(week43_paid, values='Revenue', names='Channel name', title='Week 43 Revenue Share',height=500, width=500,
color_discrete_sequence=px.colors.qualitative.Pastel1)
fig.show()
week43_paid['Conversion_Rate'] = (week43_paid.Orders / week43_paid.Visits)*100 # percentage
week43_paid['Avg_Basket_Size'] = (week43_paid.Revenue / week43_paid.Orders) # $
week43_paid['Cost_To_Income'] = (week43_paid.Spend / week43_paid.Revenue)*100
week43_paid['Revenue_per_Visitor'] = (week43_paid.Revenue / week43_paid.Visits) ## this will be in $ amount
fig1 = px.bar(week43_paid, y='Conversion_Rate', x='Channel name',
color_discrete_sequence=px.colors.qualitative.Pastel2)
fig2 = px.bar(week43_paid, y='Avg_Basket_Size', x='Channel name',
labels={'Conversion_Rate':'Conversion Rate (%)'},
color_discrete_sequence=px.colors.qualitative.Pastel2)
fig3 = px.bar(week43_paid, y='Revenue_per_Visitor', x='Channel name',
labels={'Conversion_Rate':'Conversion Rate (%)'},
color_discrete_sequence=px.colors.qualitative.Pastel2)
fig4 = px.bar(week43_paid, y='Cost_To_Income', x='Channel name',
color_discrete_sequence=px.colors.qualitative.Pastel2)
fig = make_subplots(rows=2, cols=2, shared_xaxes=True, shared_yaxes=False,
subplot_titles=("Conversion Rate (%)", "Average Basket Size ($)", "Revenue per Visitor ($)",
"Cost to Income Ratio (%)"))
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig2['data'][0], row=1, col=2)
fig.add_trace(fig3['data'][0], row=2, col=1)
fig.add_trace(fig4['data'][0], row=2, col=2)
fig.update_layout(height=500, width=1000,
title_text="Metrics by Paid Channel Name")
fig.show()